{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 371,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "'1.0.3'"
     },
     "metadata": {},
     "execution_count": 371
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "pd.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "['军训', '思修', '史纲', '马原', '毛概', '体育1', '体育2']\n[65, 80, 75, 83, 77, 100, 98]\n"
    }
   ],
   "source": [
    "# 普通列表\n",
    "subjects = [\"军训\", \"思修\", \"史纲\", \"马原\", \"毛概\", \"体育1\", \"体育2\"]\n",
    "score = [65, 80, 75, 83, 77, 100, 98]\n",
    "print(subjects)\n",
    "print(score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 372,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "0     军训\n1     思修\n2     史纲\n3     马原\n4     毛概\n5    体育1\n6    体育2\nName: subjects, dtype: object\n0     65\n1     80\n2     75\n3     83\n4     77\n5    100\n6     98\nName: score, dtype: int64\n"
    }
   ],
   "source": [
    "# 构造Series\n",
    "s1 = pd.Series(subjects, name=\"subjects\")\n",
    "s2 = pd.Series(score, name=\"score\")\n",
    "print(s1)\n",
    "print(s2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 373,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "  Subject  Score\n0      军训     65\n1      思修     80\n2      史纲     75\n3      马原     83\n4      毛概     77\n5     体育1    100\n6     体育2     98",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Subject</th>\n      <th>Score</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>军训</td>\n      <td>65</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>思修</td>\n      <td>80</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>史纲</td>\n      <td>75</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>马原</td>\n      <td>83</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>毛概</td>\n      <td>77</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>体育1</td>\n      <td>100</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>体育2</td>\n      <td>98</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 373
    }
   ],
   "source": [
    "# 把两个Series合并为一个字典，然后创建DataFrame\n",
    "df = pd.DataFrame({\"Subject\": s1, \"Score\": s2})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 379,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "      subject  year  score  credits property\n0    程序设计基础A6  2019   96.0        1       任选\n1      机械制图D5  2018   79.0        4       任选\n2    程序设计基础B3  2016  100.0        5       必修\n3       微积分C1  2016   65.0        3       必修\n4       微积分A8  2015   85.0        5       必修\n..        ...   ...    ...      ...      ...\n96     大学物理A6  2016   82.0        1       限选\n97   程序设计基础D5  2016   62.0        2       限选\n98      微积分D6  2019   86.0        3       限选\n99   电工电子技术C2  2015   66.0        4       任选\n100  程序设计基础B4  2015   61.0        4       限选\n\n[101 rows x 5 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>程序设计基础A6</td>\n      <td>2019</td>\n      <td>96.0</td>\n      <td>1</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>机械制图D5</td>\n      <td>2018</td>\n      <td>79.0</td>\n      <td>4</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>程序设计基础B3</td>\n      <td>2016</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>微积分C1</td>\n      <td>2016</td>\n      <td>65.0</td>\n      <td>3</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>微积分A8</td>\n      <td>2015</td>\n      <td>85.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>大学物理A6</td>\n      <td>2016</td>\n      <td>82.0</td>\n      <td>1</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>程序设计基础D5</td>\n      <td>2016</td>\n      <td>62.0</td>\n      <td>2</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>微积分D6</td>\n      <td>2019</td>\n      <td>86.0</td>\n      <td>3</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>电工电子技术C2</td>\n      <td>2015</td>\n      <td>66.0</td>\n      <td>4</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>100</th>\n      <td>程序设计基础B4</td>\n      <td>2015</td>\n      <td>61.0</td>\n      <td>4</td>\n      <td>限选</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 5 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 379
    }
   ],
   "source": [
    "# 读取csv文件，直接得到DataFrame\n",
    "df = pd.read_csv(\"score.csv\",encoding=\"gbk\")\n",
    "df\n",
    "\n",
    "# df.head()#查看数据的前几行\n",
    "# df.tail()#查看数据的最后几行\n",
    "# df.describe()#一键查看数据描述"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1 常规操作：索引与切片、增删查改、排序\n",
    "请务必做一下作业：10 Minutes to pandas\n",
    "\n",
    "https://pandas.pydata.org/pandas-docs/version/0.22.0/10min.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 380,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "0       96.0\n1       79.0\n2      100.0\n3       65.0\n4       85.0\n       ...  \n96      82.0\n97      62.0\n98      86.0\n99      66.0\n100     61.0\nName: score, Length: 101, dtype: float64"
     },
     "metadata": {},
     "execution_count": 380
    }
   ],
   "source": [
    "# df[\"score\"] # 按列索引\n",
    "df.iloc[2] # 按行索引\n",
    "\n",
    "# 还可以选择多行、选择多列、选择特定列、选择单个值、选择部分值……\n",
    "# 略过基本的增删查改操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 384,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "     subject  year  score  credits property\n0   程序设计基础A6  2019   96.0        1       任选\n22      体育D7  2020   98.0        1       任选\n38    线性代数A3  2019  100.0        1       限选\n51     微积分C2  2020  100.0        1       任选\n60  电工电子技术B3  2020   95.0        4       任选\n62  程序设计基础C7  2020   99.0        5       任选\n77      体育B7  2019  100.0        5       必修\n80    大学物理C2  2019   97.0        3       必修",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>程序设计基础A6</td>\n      <td>2019</td>\n      <td>96.0</td>\n      <td>1</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>体育D7</td>\n      <td>2020</td>\n      <td>98.0</td>\n      <td>1</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>38</th>\n      <td>线性代数A3</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>1</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>51</th>\n      <td>微积分C2</td>\n      <td>2020</td>\n      <td>100.0</td>\n      <td>1</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>60</th>\n      <td>电工电子技术B3</td>\n      <td>2020</td>\n      <td>95.0</td>\n      <td>4</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>62</th>\n      <td>程序设计基础C7</td>\n      <td>2020</td>\n      <td>99.0</td>\n      <td>5</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>体育B7</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>80</th>\n      <td>大学物理C2</td>\n      <td>2019</td>\n      <td>97.0</td>\n      <td>3</td>\n      <td>必修</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 384
    }
   ],
   "source": [
    "# 根据逻辑表达式进行索引\n",
    "# df[df[\"score\"]>=90]\n",
    "\n",
    "df[(df[\"score\"]>=95) & (df[\"year\"]>=2019)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 386,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "     subject  year  score  credits property\n71  程序设计基础D5  2016   55.0        3       限选\n86     微积分C5  2016   56.0        5       任选\n57  程序设计基础A5  2015   57.0        2       任选\n7   电工电子技术D4  2016   57.0        5       必修\n45    机械制图D8  2017   58.0        2       限选\n..       ...   ...    ...      ...      ...\n77      体育B7  2019  100.0        5       必修\n9   电工电子技术C7  2020    NaN        5       必修\n37    大学物理B3  2018    NaN        3       必修\n54    机械制图B3  2019    NaN        4       必修\n63    线性代数D7  2018    NaN        4       任选\n\n[101 rows x 5 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>71</th>\n      <td>程序设计基础D5</td>\n      <td>2016</td>\n      <td>55.0</td>\n      <td>3</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>86</th>\n      <td>微积分C5</td>\n      <td>2016</td>\n      <td>56.0</td>\n      <td>5</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>57</th>\n      <td>程序设计基础A5</td>\n      <td>2015</td>\n      <td>57.0</td>\n      <td>2</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>电工电子技术D4</td>\n      <td>2016</td>\n      <td>57.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>45</th>\n      <td>机械制图D8</td>\n      <td>2017</td>\n      <td>58.0</td>\n      <td>2</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>体育B7</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>电工电子技术C7</td>\n      <td>2020</td>\n      <td>NaN</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>大学物理B3</td>\n      <td>2018</td>\n      <td>NaN</td>\n      <td>3</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>54</th>\n      <td>机械制图B3</td>\n      <td>2019</td>\n      <td>NaN</td>\n      <td>4</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>63</th>\n      <td>线性代数D7</td>\n      <td>2018</td>\n      <td>NaN</td>\n      <td>4</td>\n      <td>任选</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 5 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 386
    }
   ],
   "source": [
    "# 排序\n",
    "df2 = df.sort_values(by=\"score\",ascending=True)\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 387,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "5    27\n4    21\n1    21\n3    17\n2    15\nName: credits, dtype: int64"
     },
     "metadata": {},
     "execution_count": 387
    }
   ],
   "source": [
    "# 统计出现次数\n",
    "df[\"credits\"].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 缺失项处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 389,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "     subject  year  score  credits property\n71  程序设计基础D5  2016   55.0        3       限选\n86     微积分C5  2016   56.0        5       任选\n57  程序设计基础A5  2015   57.0        2       任选\n7   电工电子技术D4  2016   57.0        5       必修\n45    机械制图D8  2017   58.0        2       限选\n..       ...   ...    ...      ...      ...\n77      体育B7  2019  100.0        5       必修\n9   电工电子技术C7  2020    0.0        5       必修\n37    大学物理B3  2018    0.0        3       必修\n54    机械制图B3  2019    0.0        4       必修\n63    线性代数D7  2018    0.0        4       任选\n\n[101 rows x 5 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>71</th>\n      <td>程序设计基础D5</td>\n      <td>2016</td>\n      <td>55.0</td>\n      <td>3</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>86</th>\n      <td>微积分C5</td>\n      <td>2016</td>\n      <td>56.0</td>\n      <td>5</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>57</th>\n      <td>程序设计基础A5</td>\n      <td>2015</td>\n      <td>57.0</td>\n      <td>2</td>\n      <td>任选</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>电工电子技术D4</td>\n      <td>2016</td>\n      <td>57.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>45</th>\n      <td>机械制图D8</td>\n      <td>2017</td>\n      <td>58.0</td>\n      <td>2</td>\n      <td>限选</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>体育B7</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>电工电子技术C7</td>\n      <td>2020</td>\n      <td>0.0</td>\n      <td>5</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>大学物理B3</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>3</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>54</th>\n      <td>机械制图B3</td>\n      <td>2019</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>必修</td>\n    </tr>\n    <tr>\n      <th>63</th>\n      <td>线性代数D7</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>任选</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 5 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 389
    }
   ],
   "source": [
    "# 处理缺失值\n",
    "# df = df2.dropna() # 去除有缺失值的项\n",
    "df = df2.fillna(0) # 使用默认值填充确实项\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3 函数应用\n",
    "分组：groupby, 聚合：agg，使用自定义函数：apply\n",
    "\n",
    "非常类似于Excel操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 392,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 分组与聚合\n",
    "b = df.groupby('year')['score','credits']\n",
    "# list(b)\n",
    "# b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 394,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "                   score  credits\nyear property                    \n2015 任选        80.000000       26\n     必修        85.750000       13\n     限选        83.000000       26\n2016 任选        80.500000       15\n     必修        76.000000       28\n     限选        72.875000       22\n2017 任选        70.500000        8\n     必修        84.750000       13\n     限选        76.666667       32\n2018 任选        67.750000       14\n     必修        68.125000       24\n     限选        86.666667        9\n2019 任选        82.800000       15\n     必修        67.800000       17\n     限选        77.555556       21\n2020 任选        91.333333       17\n     必修        56.750000       11\n     限选        75.666667       10",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th>score</th>\n      <th>credits</th>\n    </tr>\n    <tr>\n      <th>year</th>\n      <th>property</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th rowspan=\"3\" valign=\"top\">2015</th>\n      <th>任选</th>\n      <td>80.000000</td>\n      <td>26</td>\n    </tr>\n    <tr>\n      <th>必修</th>\n      <td>85.750000</td>\n      <td>13</td>\n    </tr>\n    <tr>\n      <th>限选</th>\n      <td>83.000000</td>\n      <td>26</td>\n    </tr>\n    <tr>\n      <th rowspan=\"3\" valign=\"top\">2016</th>\n      <th>任选</th>\n      <td>80.500000</td>\n      <td>15</td>\n    </tr>\n    <tr>\n      <th>必修</th>\n      <td>76.000000</td>\n      <td>28</td>\n    </tr>\n    <tr>\n      <th>限选</th>\n      <td>72.875000</td>\n      <td>22</td>\n    </tr>\n    <tr>\n      <th rowspan=\"3\" valign=\"top\">2017</th>\n      <th>任选</th>\n      <td>70.500000</td>\n      <td>8</td>\n    </tr>\n    <tr>\n      <th>必修</th>\n      <td>84.750000</td>\n      <td>13</td>\n    </tr>\n    <tr>\n      <th>限选</th>\n      <td>76.666667</td>\n      <td>32</td>\n    </tr>\n    <tr>\n      <th rowspan=\"3\" valign=\"top\">2018</th>\n      <th>任选</th>\n      <td>67.750000</td>\n      <td>14</td>\n    </tr>\n    <tr>\n      <th>必修</th>\n      <td>68.125000</td>\n      <td>24</td>\n    </tr>\n    <tr>\n      <th>限选</th>\n      <td>86.666667</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th rowspan=\"3\" valign=\"top\">2019</th>\n      <th>任选</th>\n      <td>82.800000</td>\n      <td>15</td>\n    </tr>\n    <tr>\n      <th>必修</th>\n      <td>67.800000</td>\n      <td>17</td>\n    </tr>\n    <tr>\n      <th>限选</th>\n      <td>77.555556</td>\n      <td>21</td>\n    </tr>\n    <tr>\n      <th rowspan=\"3\" valign=\"top\">2020</th>\n      <th>任选</th>\n      <td>91.333333</td>\n      <td>17</td>\n    </tr>\n    <tr>\n      <th>必修</th>\n      <td>56.750000</td>\n      <td>11</td>\n    </tr>\n    <tr>\n      <th>限选</th>\n      <td>75.666667</td>\n      <td>10</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 394
    }
   ],
   "source": [
    "# 分组与聚合\n",
    "# b = df.groupby('year')['score','credits']\n",
    "# list(b)\n",
    "\n",
    "# 分组 + 聚合\n",
    "# b = df.groupby('year')['score','credits'].agg(\"mean\")\n",
    "b = df.groupby(['year','property'])['score','credits'].agg({\"score\":\"mean\",\"credits\":\"sum\"})\n",
    "b\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 364,
   "metadata": {},
   "outputs": [],
   "source": [
    "def score_to_grade(score):\n",
    "    if score == 100:\n",
    "        grade = \"A+\"\n",
    "    elif score >= 95:\n",
    "        grade = \"A\"\n",
    "    elif score >= 90:\n",
    "        grade = \"A-\"\n",
    "    elif score >= 85:\n",
    "        grade = \"B+\"\n",
    "    elif score >= 80:\n",
    "        grade = \"B\"\n",
    "    elif score >= 77:\n",
    "        grade = \"B-\"\n",
    "    elif score >= 73:\n",
    "        grade = \"C+\"\n",
    "    elif score >= 70:\n",
    "        grade = \"C\"\n",
    "    elif score >= 67:\n",
    "        grade = \"C-\"\n",
    "    elif score >= 63:\n",
    "        grade = \"D+\"\n",
    "    elif score >= 60:\n",
    "        grade = \"D\"\n",
    "    else:\n",
    "        grade = \"F\"\n",
    "    return grade\n",
    "\n",
    "\n",
    "grade_to_GPA = {\n",
    "    \"A+\": 4.0,\n",
    "    \"A\": 4.0,\n",
    "    \"A-\": 4.0,\n",
    "    \"B+\": 3.6,\n",
    "    \"B\": 3.3,\n",
    "    \"B-\": 3.0,\n",
    "    \"C+\": 2.6,\n",
    "    \"C\": 2.3,\n",
    "    \"C-\": 2.0,\n",
    "    \"D+\": 1.6,\n",
    "    \"D\": 1.3,\n",
    "    \"F\": 0,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 397,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "     subject  year  score  credits property grade  GPA\n63    线性代数D7  2018    0.0        4       任选     F  0.0\n37    大学物理B3  2018    0.0        3       必修     F  0.0\n9   电工电子技术C7  2020    0.0        5       必修     F  0.0\n54    机械制图B3  2019    0.0        4       必修     F  0.0\n71  程序设计基础D5  2016   55.0        3       限选     F  0.0\n..       ...   ...    ...      ...      ...   ...  ...\n2   程序设计基础B3  2016  100.0        5       必修    A+  4.0\n23  程序设计基础A7  2015  100.0        4       任选    A+  4.0\n21    机械制图D1  2016  100.0        3       任选    A+  4.0\n77      体育B7  2019  100.0        5       必修    A+  4.0\n38    线性代数A3  2019  100.0        1       限选    A+  4.0\n\n[101 rows x 7 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n      <th>grade</th>\n      <th>GPA</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>63</th>\n      <td>线性代数D7</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>任选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>大学物理B3</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>3</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>电工电子技术C7</td>\n      <td>2020</td>\n      <td>0.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>54</th>\n      <td>机械制图B3</td>\n      <td>2019</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>71</th>\n      <td>程序设计基础D5</td>\n      <td>2016</td>\n      <td>55.0</td>\n      <td>3</td>\n      <td>限选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>程序设计基础B3</td>\n      <td>2016</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>程序设计基础A7</td>\n      <td>2015</td>\n      <td>100.0</td>\n      <td>4</td>\n      <td>任选</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>机械制图D1</td>\n      <td>2016</td>\n      <td>100.0</td>\n      <td>3</td>\n      <td>任选</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>体育B7</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>38</th>\n      <td>线性代数A3</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>1</td>\n      <td>限选</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 7 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 397
    }
   ],
   "source": [
    "df = df.sort_values(by=\"score\",ascending=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 396,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "     subject  year  score  credits property grade  GPA\n71  程序设计基础D5  2016   55.0        3       限选     F  0.0\n86     微积分C5  2016   56.0        5       任选     F  0.0\n57  程序设计基础A5  2015   57.0        2       任选     F  0.0\n7   电工电子技术D4  2016   57.0        5       必修     F  0.0\n45    机械制图D8  2017   58.0        2       限选     F  0.0\n..       ...   ...    ...      ...      ...   ...  ...\n77      体育B7  2019  100.0        5       必修    A+  4.0\n9   电工电子技术C7  2020    0.0        5       必修     F  0.0\n37    大学物理B3  2018    0.0        3       必修     F  0.0\n54    机械制图B3  2019    0.0        4       必修     F  0.0\n63    线性代数D7  2018    0.0        4       任选     F  0.0\n\n[101 rows x 7 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n      <th>grade</th>\n      <th>GPA</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>71</th>\n      <td>程序设计基础D5</td>\n      <td>2016</td>\n      <td>55.0</td>\n      <td>3</td>\n      <td>限选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>86</th>\n      <td>微积分C5</td>\n      <td>2016</td>\n      <td>56.0</td>\n      <td>5</td>\n      <td>任选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>57</th>\n      <td>程序设计基础A5</td>\n      <td>2015</td>\n      <td>57.0</td>\n      <td>2</td>\n      <td>任选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>电工电子技术D4</td>\n      <td>2016</td>\n      <td>57.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>45</th>\n      <td>机械制图D8</td>\n      <td>2017</td>\n      <td>58.0</td>\n      <td>2</td>\n      <td>限选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>体育B7</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>电工电子技术C7</td>\n      <td>2020</td>\n      <td>0.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>大学物理B3</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>3</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>54</th>\n      <td>机械制图B3</td>\n      <td>2019</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>63</th>\n      <td>线性代数D7</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>任选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 7 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 396
    }
   ],
   "source": [
    "# 使用自定义函数：apply\n",
    "df[\"grade\"] = df[\"score\"].apply(score_to_grade)\n",
    "df[\"GPA\"] = df[\"score\"].apply(lambda x: grade_to_GPA[score_to_grade(x)])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 366,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "     subject  year  score  credits property grade  GPA\n51     微积分C2  2020  100.0        1       任选    A+  4.0\n82    机械制图D9  2015  100.0        3       限选    A+  4.0\n2   程序设计基础B3  2016  100.0        5       必修    A+  4.0\n77      体育B7  2019  100.0        5       必修    A+  4.0\n68    机械制图B3  2018  100.0        3       任选    A+  4.0\n..       ...   ...    ...      ...      ...   ...  ...\n71  程序设计基础D5  2016   55.0        3       限选     F  0.0\n9   电工电子技术C7  2020    0.0        5       必修     F  0.0\n37    大学物理B3  2018    0.0        3       必修     F  0.0\n54    机械制图B3  2019    0.0        4       必修     F  0.0\n63    线性代数D7  2018    0.0        4       任选     F  0.0\n\n[101 rows x 7 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>subject</th>\n      <th>year</th>\n      <th>score</th>\n      <th>credits</th>\n      <th>property</th>\n      <th>grade</th>\n      <th>GPA</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>51</th>\n      <td>微积分C2</td>\n      <td>2020</td>\n      <td>100.0</td>\n      <td>1</td>\n      <td>任选</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>82</th>\n      <td>机械制图D9</td>\n      <td>2015</td>\n      <td>100.0</td>\n      <td>3</td>\n      <td>限选</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>程序设计基础B3</td>\n      <td>2016</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>77</th>\n      <td>体育B7</td>\n      <td>2019</td>\n      <td>100.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>68</th>\n      <td>机械制图B3</td>\n      <td>2018</td>\n      <td>100.0</td>\n      <td>3</td>\n      <td>任选</td>\n      <td>A+</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>71</th>\n      <td>程序设计基础D5</td>\n      <td>2016</td>\n      <td>55.0</td>\n      <td>3</td>\n      <td>限选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>电工电子技术C7</td>\n      <td>2020</td>\n      <td>0.0</td>\n      <td>5</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>大学物理B3</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>3</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>54</th>\n      <td>机械制图B3</td>\n      <td>2019</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>必修</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>63</th>\n      <td>线性代数D7</td>\n      <td>2018</td>\n      <td>0.0</td>\n      <td>4</td>\n      <td>任选</td>\n      <td>F</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>101 rows × 7 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 366
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 398,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "year\n2015    3.080000\n2016    2.293846\n2017    2.669811\n2018    2.659574\n2019    2.652830\n2020    2.502632\ndtype: float64"
     },
     "metadata": {},
     "execution_count": 398
    }
   ],
   "source": [
    "# 使用apply函数实现两列的加权平均\n",
    "def wavg(group, avg_name, weight_name):\n",
    "    d = group[avg_name]\n",
    "    w = group[weight_name]\n",
    "    return (d * w).sum() / w.sum()\n",
    "\n",
    "GPA_year = df.groupby('year').apply(wavg, \"GPA\", \"credits\")\n",
    "# GPA_year\n",
    "df.groupby(['year','property']).apply(wavg, \"GPA\", \"credits\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 370,
   "metadata": {},
   "outputs": [],
   "source": [
    "GPA_year.to_csv(\"GPA_year.csv\",encoding=\"gbk\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": 3
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python_defaultSpec_1596635677499",
   "display_name": "Python 3.6.4 64-bit ('base': conda)"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}